function stat=actor_critic(stat)

global NO_REPLICATIONS ITERMAX NA NS SMALL TPM TRM LAMBDA

% Finding the Max factor in the current state 

J_next=stat.J(stat.current_state);

Jstar=stat.J(1);

J_old=stat.J(stat.old_state);

stat.iter=stat.iter+1;

%learn_rate=1/(stat.iter);
learn_rate2=log(stat.iter+1)/(stat.iter+1);
learn_rate1=0.5*300/(300+stat.iter);



beta_old=stat.beta(stat.old_state,stat.old_action);



beta_new=beta_old*(1-learn_rate1)+(learn_rate1*(stat.rimm-Jstar+J_next));

stat.beta(stat.old_state,stat.old_action)=beta_new;

%if stat.flag==0

J_new=J_old*(1-learn_rate2)+(learn_rate2*(stat.rimm-Jstar+J_next));

stat.J(stat.old_state)=J_new;

%end


if mod(stat.iter,100)==0
    k=stat.iter/100;
    stat.J_plot1(k)=stat.J(1);
    stat.J_plot2(k)=stat.J(2);
    stat.J_plot3(k)=stat.J(3);
    stat.J_plot4(k)=stat.J(4);
    stat.J_plot5(k)=stat.J(5);
    stat.iter_plot(k)=k;
end

    






